library(basicspace)
library(psych)
library(dplyr)
library(readstata13)
library(tidyr)

climate =read_dta("C:/Users/lerner-josh/Dropbox/America1Room/Climate/data/climate_data_foranalyses.dta")
colnames(climate)
scale_climate = climate %>%
  select(ends_with("_is")) %>% 
  blackbox(missing = c(77, 98, 99), 
           verbose=T, dims = 4, minscale = 10)
  
plot( climate$ideology_score1, scale_climate$individuals[[2]]$c1, xlab = "PBS T1 Ideology Score", ylab = "Factor Analysis 1st Dimension")
summary(lm(scale_climate$individuals[[1]]$c1 ~ climate$ideology_score1))

pbs_qs = c("Q1C", "Q1E", "Q2A", "Q2B", "Q2C", "Q3A", "Q3B", "Q5E", "Q5F", "Q5G", "Q5H", "Q5I", "Q5N",  "Q6A", "Q6B", "Q6D", "Q6E", "Q6F", "Q7A", "Q7D", "Q7E", "Q8A", "Q9A", "Q9C", "Q9D", "Q9E", "Q9F", "Q10G", "Q11A", "Q11B", "Q12A", "Q12B")
climate = as.data.frame(climate)
scale_climate2 = climate %>%
  select(pbs_qs) %>% 
  blackbox(missing = c(77, 98, 99), 
           verbose=T, dims = 4, minscale = 10)
summary(scale_climate)
summary(scale_climate2)

scale_climate$fits
scale_climate2$fits

plot( climate$ideology_score1, scale_climate2$individuals[[2]]$c1, xlab = "PBS T1 Ideology Score", ylab = "Factor Analysis 1st Dimension")
summary(lm(scale_climate$individuals[[1]]$c1 ~ climate$ideology_score1))


###########################################################################

library(diverse)

table(climate$groupid)

race_climate_group = climate %>% 
  filter(!is.na(groupid)) %>% 
  group_by(groupid, RACETHNICITY) %>% 
  summarize(value = n()) %>% 
  ungroup() %>% 
  as.data.frame()
#race_climate_group = as.data.frame(as.matrix(race_climate_group))
gini_race = diversity(race_climate_group, type = "gini-simpson")
gini_race$groupid = rownames(gini_race)

#Party ID

party_climate_group = climate %>% 
  filter(!is.na(groupid)) %>% 
  group_by(groupid, PartyID7) %>% 
  summarize(value = n()) %>% 
  ungroup() %>% 
  as.data.frame()

gini_party = diversity(party_climate_group, type = "gini-simpson")
gini_party$groupid = rownames(gini_party)



educ_climate_group = climate %>% 
  filter(!is.na(groupid)) %>% 
  group_by(groupid, EDUC5) %>% 
  summarize(value = n()) %>% 
  ungroup() %>% 
  as.data.frame()

gini_educ = diversity(educ_climate_group, type = "gini-simpson")
gini_educ$groupid = rownames(gini_party)
gini_educ = rename_at(gini_educ, vars(-groupid), ~ paste0(., ".educ"))


income_climate_group = climate %>% 
  filter(!is.na(groupid)) %>% 
  group_by(groupid, INCOME9) %>% 
  summarize(value = n()) %>% 
  ungroup() %>% 
  as.data.frame()

gini_income = diversity(income_climate_group, type = "gini-simpson")
gini_income$groupid = rownames(gini_party)
gini_income = rename_at(gini_income, vars(-groupid), ~ paste0(., ".income"))



region_climate_group = climate %>% 
  filter(!is.na(groupid)) %>% 
  group_by(groupid, REGION4) %>% 
  summarize(value = n()) %>% 
  ungroup() %>% 
  as.data.frame()

gini_region = diversity(region_climate_group, type = "gini-simpson")
gini_region$groupid = rownames(gini_party)
gini_region = rename_at(gini_region, vars(-groupid), ~ paste0(., ".region"))


#libcon

ideo_climate_group = climate %>% 
  filter(!is.na(groupid)) %>% 
  group_by(groupid, LIBCONV) %>% 
  summarize(value = n()) %>% 
  ungroup() %>% 
  as.data.frame()

gini_ideo = diversity(ideo_climate_group, type = "gini-simpson")
gini_ideo$groupid = rownames(gini_ideo)

age_climate_group = climate %>% 
  filter(!is.na(groupid)) %>% 
  group_by(groupid, AGE7) %>% 
  summarize(value = n()) %>% 
  ungroup() %>% 
  as.data.frame()

gini_age = diversity(age_climate_group, type = "gini-simpson")
gini_age$groupid = rownames(gini_age)




PBS_climate_group = climate %>% 
  filter(!is.na(groupid)) %>% 
  group_by(groupid) %>% 
  summarize(sd_PBS = sd(ideology_score1), 
            mean_PBS = mean(ideology_score1)) %>% 
  ungroup() %>% 
  as.data.frame()

PBS_diff_climate_group = climate %>% 
  filter(!is.na(groupid)) %>% 
  group_by(groupid) %>% 
  summarize(diff = mean(ideology_diff)) %>% 
  ungroup() %>% 
  as.data.frame()
PBS_diff_climate_group$groupid = as.character(PBS_diff_climate_group$groupid)
PBS_climate_group$groupid = as.character(PBS_climate_group$groupid)

gini_age = rename_at(gini_age, vars(-groupid), ~ paste0(., ".age"))
gini_ideo = rename_at(gini_ideo, vars(-groupid), ~ paste0(., ".ideo"))
gini_party = rename_at(gini_party, vars(-groupid), ~ paste0(., ".party"))
gini_race = rename_at(gini_race, vars(-groupid), ~ paste0(., ".race"))



library(ggcorrplot)

group_differences = gini_age %>% 
  left_join(gini_ideo)  %>% 
  left_join(gini_party) %>% 
  left_join(gini_race)  %>% 
  left_join(gini_educ)  %>% 
  left_join(gini_income)  %>% 
  left_join(gini_region)  %>% 
  left_join(PBS_climate_group)  %>% 
  left_join(PBS_diff_climate_group) 
library(ggcorrplot)
group_differences_plot= group_differences %>% 
  select(contains(".R."),sd_PBS  , -groupid, -mean_PBS, -diff) %>% 
  drop_na()
group_diff_cor = cor(group_differences_plot)
row.names(group_diff_cor) = c("Gini - Age", "Gini - Ideology", "Gini - Party ID", "Gini - Race", "Gini - Education", "Gini - Income",
                              "Gini - Region", "St. Dev. PBS")
colnames(group_diff_cor) = c("Gini - Age", "Gini - Ideology", "Gini - Party ID", "Gini - Race", "Gini - Education", "Gini - Income",
                              "Gini - Region", "St. Dev. PBS")

ggcorrplot( group_diff_cor,  type = "upper",lab = TRUE,hc.order = TRUE,
           ggtheme =theme_clean())

ggplot(group_differences, aes(x=mean_PBS, y=gini.simpson.age)) +geom_point() + theme_bw()
ggplot(group_differences, aes(x=mean_PBS, y=gini.simpson.party)) +geom_point() + theme_bw()
ggplot(group_differences, aes(x=mean_PBS, y=gini.simpson.ideo)) +geom_point() + theme_bw()
ggplot(group_differences, aes(x=mean_PBS, y=gini.simpson.race)) +geom_point() + theme_bw()
ggplot(group_differences, aes(x=mean_PBS, y=sd_PBS)) +geom_point() + theme_bw()

ggplot(group_differences, aes(y=diff, x=gini.simpson.age)) +geom_point() + theme_bw()
ggplot(group_differences, aes(y=diff, x=gini.simpson.party)) +geom_point() + theme_bw()
ggplot(group_differences, aes(y=diff, x=gini.simpson.ideo)) +geom_point() + theme_bw()
ggplot(group_differences, aes(y=diff, x=gini.simpson.race)) +geom_point() + theme_bw()
ggplot(group_differences, aes(y=diff, x=sd_PBS)) +geom_point() + theme_bw()

group_characteristics = group_differences %>% 
  select(contains(".R."),sd_PBS  , groupid, mean_PBS, diff)

group_characteristics$gini_average = (group_characteristics$gini.simpson.R.age +  
                                         group_characteristics$gini.simpson.R.ideo + 
                                         group_characteristics$gini.simpson.R.party + 
                                         group_characteristics$gini.simpson.R.race + 
                                         group_characteristics$gini.simpson.R.educ +  
                                         group_characteristics$gini.simpson.R.income +  
                                         group_characteristics$gini.simpson.R.region)/7

#group_characteristics$gini_average_z = scale(group_characteristics$gini_average)
climate$groupid = as.character(climate$groupid)
climate_group = left_join(climate, group_characteristics, by="groupid")
cor(group_characteristics[, -which(names(group_characteristics) %in% c("groupid"))], use='everything')[11,]
cor(group_characteristics$gini_average, group_characteristics$sd_PBS, use='complete.obs')

plot(climate_group$ideology_diff, climate_group$gini_average)
summary(climate_group$gini_average)
climate_group$gini_group = NA
climate_group$gini_group[climate_group$gini_average >= 3.64] = "High Gini"
climate_group$gini_group[climate_group$gini_average < 3.64] = "Low Gini"

climate_group %>% 
  filter(is.na(groupid) == F) %>% 
  ggplot(aes(x=ideology_score1, y=ideology_diff, color = gini_group)) + geom_point() + 
  geom_smooth(method = "loess") + theme_classic() +
  xlab("PBS Time One") +ylab("Individual-Level Change in PBS T1-T2")

climate_group %>% 
  filter(is.na(groupid) == F) %>% 
  ggplot(aes(x=gini_average, y=ideology_diff, color = sd_PBS)) + geom_point() + geom_smooth(method = "loess") + 
  scale_color_viridis_c() + theme_classic()+xlab("Group Gini-Simpson Diversity Index") +ylab("Individual-Level Change in PBS T1-T2")

climate_group %>% 
  filter(is.na(groupid) == F) %>% 
  ggplot(aes(x=gini_average, y=belief_diff, color = sd_PBS)) + geom_point() + geom_smooth(method = "loess") + 
  scale_color_viridis_c() + theme_classic()+xlab("Group Gini-Simpson Diversity Index") +ylab("Individual-Level Change in Climate Belief T1-T2")

climate_group %>% 
  filter(is.na(groupid) == F) %>% 
  ggplot(aes(x=gini_average, y=climate_knowledge_diff, color = sd_PBS)) + geom_point() + geom_smooth(method = "loess") + 
  scale_color_viridis_c() + theme_classic()+xlab("Group Gini-Simpson Diversity Index") +ylab("Individual-Level Change in Climate Knoweledge T1-T2")


climate_group %>% 
  filter(is.na(groupid) == F) %>% 
  ggplot(aes(x=sd_PBS, y=ideology_diff, color = gini_average)) + geom_point() + geom_smooth(method = "loess") + 
  scale_color_viridis_c() + theme_classic() +xlab("Group Standard Deviation of the PBS")+ylab("Individual-LevelChange in PBS T1-T2")

summary(lm(ideology_diff ~ sd_PBS, data=climate_group))
summary(lm(ideology_diff ~ gini_average, data=climate_group))
summary(lm(ideology_diff ~ gini.simpson.R.age, data=climate_group))
summary(lm(ideology_diff ~ gini.simpson.R.ideo, data=climate_group))
summary(lm(ideology_diff ~ gini.simpson.R.party, data=climate_group))
summary(lm(ideology_diff ~ gini.simpson.R.race, data=climate_group))
summary(lm(ideology_diff ~ gini.simpson.R.educ, data=climate_group))
summary(lm(ideology_diff ~ gini.simpson.R.income, data=climate_group))
summary(lm(ideology_diff ~ gini.simpson.R.region, data=climate_group))
library(estimatr)
gini_reg1 =lm(ideology_diff ~ gini_average, data=climate_group)
gini_reg2 =lm(ideology_diff ~ gini_average +as.factor(INCOME9) +GENDER + AGE + as.factor(EDUC5) + as.factor(RACETHNICITY), data=climate_group)
gini_reg3 =lm(ideology_diff ~ gini_average +as.factor(party) + ideology_score1         +
                as.factor(INCOME9) +GENDER + AGE + as.factor(EDUC5) + as.factor(RACETHNICITY), data=climate_group)

summary(gini_reg1)
summary(gini_reg2)
summary(gini_reg3)

library(stargazer)
stargazer(gini_reg1, gini_reg2, gini_reg3, type = 'html', out = "diversity_regression.html", style="apsr", no.space =T)


gini_reg1a =lm_robust(ideology_diff ~ gini_average, data=climate_group, clusters = groupid)
gini_reg2a =lm_robust(ideology_diff ~ gini_average +as.factor(INCOME9) +GENDER + AGE + as.factor(EDUC5) + as.factor(RACETHNICITY), data=climate_group, clusters = groupid)
gini_reg3a =lm_robust(ideology_diff ~ gini_average +as.factor(party) + ideology_score1         +
                as.factor(INCOME9) +GENDER + AGE + as.factor(EDUC5) + as.factor(RACETHNICITY), data=climate_group, clusters = groupid)

summary(gini_reg1a)
summary(gini_reg2a)
summary(gini_reg3a)


# robustness checks
climate_group$EDUC5_f = as.factor(climate_group$EDUC5)
gini_reg3a =lm(ideology_diff ~ gini_average +as.factor(party) + ideology_score1 +
                as.factor(INCOME9) +GENDER + AGE + as.factor(EDUC5) + as.factor(RACETHNICITY), data=climate_group)
gini_reg3b =lm(ideology_diff ~ gini.simpson.R.age +as.factor(party) + ideology_score1         +
                as.factor(INCOME9) +GENDER + AGE + as.factor(EDUC5) + as.factor(RACETHNICITY), data=climate_group)
gini_reg3c =lm(ideology_diff ~ gini.simpson.R.ideo +as.factor(party) + ideology_score1         +
                as.factor(INCOME9) +GENDER + AGE + as.factor(EDUC5) + as.factor(RACETHNICITY), data=climate_group)
gini_reg3d =lm(ideology_diff ~ gini.simpson.R.party +as.factor(party) + ideology_score1         +
                as.factor(INCOME9) +GENDER + AGE + as.factor(EDUC5) + as.factor(RACETHNICITY), data=climate_group)
gini_reg3e =lm(ideology_diff ~ gini.simpson.R.race +as.factor(party) + ideology_score1         +
                as.factor(INCOME9) +GENDER + AGE + as.factor(EDUC5) + as.factor(RACETHNICITY), data=climate_group)
gini_reg3f =lm(ideology_diff ~ gini.simpson.R.educ +as.factor(party) + ideology_score1         +
                as.factor(INCOME9) +GENDER + AGE + as.factor(EDUC5) + as.factor(RACETHNICITY), data=climate_group)
gini_reg3g =lm(ideology_diff ~ gini.simpson.R.income +as.factor(party) + ideology_score1         +
                as.factor(INCOME9) +GENDER + AGE + as.factor(EDUC5) + as.factor(RACETHNICITY), data=climate_group)
gini_reg3h =lm(ideology_diff ~ gini.simpson.R.region +as.factor(party) + ideology_score1         +
                as.factor(INCOME9) +GENDER + AGE + as.factor(EDUC5) + as.factor(RACETHNICITY), data=climate_group)

save(climate, climate_group,group_characteristics, file = "climate_data.RData")
